This document will try to approach Zoom’s historical analysis from a quantitative perspective, it is meant to be a complementary document to the case write app & excel worksheet submitted by Team-7.
# transform data into the correct format
df_tall[, c("Parameter", "Ticker", "Year")] <- lapply(df_tall[, c("Parameter", "Ticker", "Year")], as.factor)
# show the final data format
str(df_tall)## 'data.frame': 780 obs. of 4 variables:
## $ Parameter: Factor w/ 12 levels " AP/Revenues ",..: 10 10 10 10 10 10 10 10 10 10 ...
## $ Ticker : Factor w/ 13 levels "EGHT","FIVN",..: 13 13 13 13 13 12 12 12 12 12 ...
## $ Year : Factor w/ 5 levels "2018","2019",..: 1 2 3 4 5 1 2 3 4 5 ...
## $ Value : num -0.03 0.03 0.01 0.16 0.22 0.05 0 0 0.01 0 ...
Wrangle data in preparation for the clustering analysis
# spread data around year
df <- spread(df_tall, key = Year, value = Value)
# split the df into a df_list around parameters while dropping the parameter column
dfl <- split(df, f = df$Parameter)
dfl <- lapply(dfl, function(x) subset(x, select = -Parameter))
summary(dfl)## Length Class Mode
## AP/Revenues 6 data.frame list
## AR/Revenues 6 data.frame list
## Capital Turnover 6 data.frame list
## Cash/Revenue 6 data.frame list
## COGS/Revenue 6 data.frame list
## EBIT Margin 6 data.frame list
## Inventories/Revenue 6 data.frame list
## NPP&E/Revenues 6 data.frame list
## Prepaid Expenses/Revenues 6 data.frame list
## ROIC 6 data.frame list
## SG&A/Revenue 6 data.frame list
## WCR/Revenues 6 data.frame list
str(dfl$` AP/Revenues `)## 'data.frame': 13 obs. of 6 variables:
## $ Ticker: Factor w/ 13 levels "EGHT","FIVN",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ 2018 : num 0.12 0.03 0.27 0.01 0.11 0.12 0.06 0.02 0.07 0.02 ...
## $ 2019 : num 0.15 0.03 0.27 0.03 0.11 0.11 0.01 0.04 0.08 0.03 ...
## $ 2020 : num 0.2 0.04 0.27 0.01 0.09 0.16 0.01 0.05 0.08 0.02 ...
## $ 2021 : num 0.11 0.03 0.27 0.1 0.11 0.19 0.03 0.04 0.1 0.02 ...
## $ 2022 : num 0.17 0.03 0.27 0.08 0 0.22 0.03 0.03 0.12 0.03 ...
# use ticker name as row column for each df in dfl & drop ticker column
for (item in 1:length(dfl)) {
rownames(dfl[[item]]) <- dfl[[item]]$Ticker
dfl[[item]]$Ticker <- NULL
}
head(dfl$` EBIT Margin `, 7)## 2018 2019 2020 2021 2022
## EGHT -0.11 -0.25 -0.36 -0.27 -0.24
## FIVN 0.03 0.01 -0.03 -0.09 -0.11
## NICE 0.14 0.15 0.15 0.14 0.15
## PLTR -0.08 -0.27 -1.07 -0.78 -1.05
## QUMU -0.28 -0.21 -0.20 -0.76 0.00
## RBBN -0.05 0.01 0.04 0.02 -0.04
## RBLX -0.27 -0.15 -0.29 -0.26 -0.42
#build the a clusterization function
build_cluster_map <- function(dataframe, number_of_clusters) {
optimum_cluster <- kmeans(dataframe,
centers = number_of_clusters,
nstart = 25)
cluster_df <- as.data.frame(optimum_cluster$cluster)
colnames(cluster_df)[1] <- "Cluster"
return (
list(
cluster_table = cluster_df,
visualization = fviz_cluster(optimum_cluster, data = dataframe)
)
)
}
analyize_cluster <- function(dataframe, number_of_clusters = 3){
k_max <- 10
# Average Silhouette Width Analysis
p1 <- fviz_nbclust(dataframe, kmeans, k.max = k_max, method = "silhouette") +
theme_minimal() + theme(axis.title.y = element_blank()) +
ggtitle("Average Silhouette Width") +
geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")
# Total Within Sum of Squares Analysis
p2 <- fviz_nbclust(dataframe, kmeans, k.max = k_max, method = "wss") +
theme_minimal() + theme(axis.title.y = element_blank()) +
ggtitle("Total Within Sum of Squares") +
geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")
# Gap Statistics (k) Analysis
gap_stat <- clusGap(dataframe, FUN = kmeans, nstart = 25, K.max = k_max, B = 50)
p3 <- fviz_gap_stat(gap_stat) +
theme_minimal() + theme(axis.title.y = element_blank()) +
ggtitle("Gap Statistics (k)") +
geom_vline(xintercept = number_of_clusters, linetype = 2, col = "red")
# Cluster Analysis
cluster_map <- build_cluster_map(dataframe, number_of_clusters)
p4 <- cluster_map$visualization +
theme_minimal() +
ggtitle(paste("Cluster Plot for",number_of_clusters,"Clusters"))
p5 <- ggplot() +
theme_minimal() +
annotation_custom(tableGrob(cluster_map$cluster_table))
ggarrange(
ggarrange(p1, p2, p3, widths = c(1,1), ncol = 3),
ggarrange(p4, p5, widths = c(2,1), ncol = 2),
nrow = 2, labels = c("A","B"), heights = c(2,3)
)
}In this section we will utilize principle components to reduce the 12 financial ratios into two dimensions, we understand that this may cause loss of information due the reduction method, but our hypothesis is that performing clustering analysis on the principle components may reveal association between Zoom & other companies that we can’t see by analyzing individual financial ratio.
# spread data around Parameter
dfw <- pivot_wider(df_tall, names_from = Parameter, values_from = Value)
dfw$Year <- factor(dfw$Year, levels = c(2018, 2019, 2020, 2021, 2022))
dfw <- as.data.frame(dfw)
# split the df into a df_list around Year while dropping the Year column
dfwl <- split(dfw, f = dfw$Year)
dfwl <- lapply(dfwl, function(x) subset(x, select = -Year))
summary(dfwl)## Length Class Mode
## 2018 13 data.frame list
## 2019 13 data.frame list
## 2020 13 data.frame list
## 2021 13 data.frame list
## 2022 13 data.frame list
head(dfwl$'2018', 7)## Ticker ROIC EBIT Margin Capital Turnover COGS/Revenue SG&A/Revenue
## 1 ZM -0.03 -0.03 1.01 0.20 0.72
## 6 VG 0.05 0.05 0.96 0.41 0.48
## 11 TWLO -0.13 -0.18 0.73 0.46 0.45
## 16 TEAM -0.05 -0.05 0.45 0.20 0.38
## 21 SPLK -0.13 -0.14 0.93 0.20 0.72
## 26 RNG -0.02 -0.02 0.97 0.23 0.64
## 31 RBLX -0.45 -0.27 1.68 0.22 0.23
## NPP&E/Revenues Cash/Revenue AR/Revenues Inventories/Revenue
## 1 0.09 0.92 0.16 0
## 6 0.05 0.00 0.07 0
## 11 0.10 1.15 0.15 0
## 16 0.06 1.97 0.07 0
## 21 0.18 0.89 0.30 0
## 26 0.22 0.84 0.14 0
## 31 0.29 0.87 0.16 0
## Prepaid Expenses/Revenues WCR/Revenues AP/Revenues
## 1 0.13 0.84 0.02
## 6 0.00 -0.03 0.05
## 11 0.04 1.23 0.13
## 16 0.02 1.62 0.02
## 21 0.00 0.91 0.07
## 26 0.02 0.97 0.02
## 31 0.03 0.49 0.06
# build row name & scale all values
for (item in 1:length(dfwl)) {
rownames(dfwl[[item]]) <- dfwl[[item]]$Ticker
# use ticker name as row column for each df in dfl & drop ticker column
dfwl[[item]]$Ticker <- NULL
for (column in 1:length(dfwl[[item]])) {
dfwl[[item]][[column]] <- scale(dfwl[[item]][[column]])
}
}
head(dfwl$'2018', 7)## ROIC EBIT Margin Capital Turnover COGS/Revenue SG&A/Revenue
## ZM 0.57805435 0.3814197 -0.05057769 -1.0834921 1.2945956
## VG 0.97883871 1.0425472 -0.16799018 1.0755834 -0.2589191
## TWLO 0.07707391 -0.8581944 -0.70808763 1.5896490 -0.4531084
## TEAM 0.47785827 0.2161378 -1.36559756 -1.0834921 -0.9062169
## SPLK 0.07707391 -0.5276306 -0.23843767 -1.0834921 1.2945956
## RNG 0.62815240 0.4640606 -0.14450768 -0.7750528 0.7767573
## RBLX -1.52606350 -1.6019628 1.52274966 -0.8778659 -1.8771636
## NPP&E/Revenues Cash/Revenue AR/Revenues Inventories/Revenue
## ZM -0.22226978 0.3402703 0.04632342 -0.2773501
## VG -0.72479275 -1.4214754 -0.85698318 -0.2773501
## TWLO -0.09663903 0.7807067 -0.05404398 -0.2773501
## TEAM -0.59916201 2.3509583 -0.85698318 -0.2773501
## SPLK 0.90840692 0.2828220 1.45146701 -0.2773501
## RNG 1.41092989 0.1870750 -0.15441138 -0.2773501
## RBLX 2.29034510 0.2445232 0.04632342 -0.2773501
## Prepaid Expenses/Revenues WCR/Revenues AP/Revenues
## ZM 3.0705979 0.4999849 -0.8220927
## VG -0.5582905 -1.2507363 -0.4057081
## TWLO 0.5582905 1.2847910 0.7046509
## TEAM 0.0000000 2.0695970 -0.8220927
## SPLK -0.5582905 0.6408475 -0.1281183
## RNG 0.0000000 0.7615869 -0.8220927
## RBLX 0.2791453 -0.2043282 -0.2669132
df_to_analyze <- dfwl$'2018'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfwl$'2019'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)
The two graphs above highlights that ZM has shared a cluster with SPLK
& RNG during 2018, but it is having its own cluster in 2019. It is
expected for Zoom to drift in its similarities post IPO, hence, we will
run the lumsum analysis again with more recent year.
df_to_analyze <- dfwl$'2022'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)
We have noticed that ZM is again is having its own cluster during 2022,
which suggests that ZM have some uniquiness in its financial ratios
compared to other companies in the sample, we will try to uncover that
relationship in the next section by limiting the parameters of the
principle components analysis.
In this section we will rerun the principle components analysis on ROIC, EBIT Margin, and Capital Turnover parameters only.
# build a limited data frame
dfwll <- dfwl
# build row name & scale all values
for (item in 1:length(dfwll)) {
dfwll[[item]] <- select(dfwll[[item]],
-c(' COGS/Revenue ',
' SG&A/Revenue ',
' NPP&E/Revenues ',
' Cash/Revenue ',
' AR/Revenues ',
' Inventories/Revenue ',
' Prepaid Expenses/Revenues ',
' WCR/Revenues ',
' AP/Revenues '
)
)
}
head(dfwll$'2019', 7)## ROIC EBIT Margin Capital Turnover
## ZM 0.8957509 0.8238972 1.11702451
## VG 0.6814301 0.7529656 0.06848243
## TWLO 0.2527886 -1.6587069 -1.45487116
## TEAM 0.3956691 0.3273764 -0.30741002
## SPLK 0.1099081 -0.3110075 -0.88114059
## RNG 0.3956691 0.3273764 -0.40632908
## RBLX -1.3903373 -0.3819391 1.90837703
df_to_analyze <- dfwll$'2018'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfwll$'2019'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfwll$'2020'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfwll$'2021'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfwll$'2022'
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)
The analysis above suggests that principle components cauterization over
ROIC, EBIT Margin, and Capital Turnover suggests that ZM are having it’s
own cluster in 2019, while it shared the cluster with
VG, TEAM, RNG, RBBN,FIVN in 2018, yet that cluster has
changed over the following years 2020-2022. Hence, we will be extracting
the cluster data to Excel to run another cluster analysis on the
combined grouping to identify the best compaines to to be considered as
ZM comps.
df_to_analyze <- dfl$` AP/Revenues `
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfl$` AR/Revenues `
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfl$' Capital Turnover '
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfl$' Cash/Revenue '
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfl$' EBIT Margin '
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfl$' NPP&E/Revenues '
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 6)df_to_analyze <- dfl$' ROIC '
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)df_to_analyze <- dfl$' SG&A/Revenue '
# identify the optimum cluster size
analyize_cluster(df_to_analyze,4 )df_to_analyze <- dfl$' WCR/Revenues '
# identify the optimum cluster size
analyize_cluster(df_to_analyze, 5)